{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-09-05T08:34:47.581920800Z",
     "start_time": "2023-09-05T08:34:41.002113Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据集的描述\n",
      " 这是李铭晗写的数据集\n",
      "数据内容(data:\n",
      " ['WEB开发\\r\\nflask,springboot,vue\\r\\nDjango', '马云与阿里巴巴\\r\\n京东物流', '现金流是一个企业的血液', '进化论,相对论', '资产负债表,企业管理\\r\\nMBA 会计', '机器学习\\r\\nsklearn,pytorch,\\r\\n深度学习', '霍金与时间简史']\n",
      "类别对应的名称:\n",
      " ['business', 'science', 'technology']\n"
     ]
    }
   ],
   "source": [
    "# 文件导入方法\n",
    "from sklearn.datasets import load_files\n",
    "\n",
    "data = load_files(\"MyDataSet\",load_content=True,encoding='utf-8',description='这是李铭晗写的数据集')\n",
    "print(\"数据集的描述\\n\",data.DESCR)\n",
    "print(\"数据内容(data:\\n\",data.data)\n",
    "print(\"类别对应的名称:\\n\",data.target_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每个文件对应的名称:\n",
      " ['MyDataSet\\\\technology\\\\0001.txt' 'MyDataSet\\\\business\\\\0003.txt'\n",
      " 'MyDataSet\\\\business\\\\0002.txt' 'MyDataSet\\\\science\\\\0001.txt'\n",
      " 'MyDataSet\\\\business\\\\0001.txt' 'MyDataSet\\\\technology\\\\00002.txt'\n",
      " 'MyDataSet\\\\science\\\\0002.txt']\n",
      "每个文件对应的标签(0,1,2):\n",
      " [2 0 0 1 0 2 1]\n"
     ]
    }
   ],
   "source": [
    "print(\"每个文件对应的名称:\\n\",data.filenames)\n",
    "print(\"每个文件对应的标签(0,1,2):\\n\",data.target)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-09-05T08:39:00.133434900Z",
     "start_time": "2023-09-05T08:39:00.125451800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每个文件标签的对应版本:\n",
      " ['technology', 'business', 'business', 'science', 'business', 'technology', 'science']\n"
     ]
    }
   ],
   "source": [
    "labels = []\n",
    "for index in data.target:\n",
    "    labels.append(data.target_names[index])\n",
    "print('每个文件标签的对应版本:\\n',labels)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-09-05T08:47:31.902874500Z",
     "start_time": "2023-09-05T08:47:31.874875300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
